0.1 Objective

The objective of this notebook is to perform pre-processing and dimensional reduction on both assays independently, using standard approaches for RNA and ATAC-seq data. Then, we will follow the “Joint RNA and ATAC analysis: 10x multiomic” vignette from Signac to obtain a joint visualization using both modalities.

1 Pre-processing

1.1 Load packages

library("RColorBrewer")
library(Signac)
library(Seurat)
library(GenomicRanges)
library(future)
#library(SeuratWrappers)
library(harmony)
library(EnsDb.Hsapiens.v86)
library(stringr)
library(dplyr)
library(ggplot2)
library(patchwork)

set.seed(173)

1.2 Parameters

path_to_object <- ("/Users/mlromeror/Documents/multiome_tonsil_Lucia/results/R_objects/7.tonsil_filtered_merged_with_metadata.rds")
path_to_save <- ("/Users/mlromeror/Documents/multiome_tonsil_Lucia/results/R_objects/8.tonsil_multiome_integrated_using_wnn.rds")

1.3 Load Multiome filtered data with metadata

tonsil_filtered <- readRDS(path_to_object)
tonsil_filtered
## An object of class Seurat 
## 188615 features across 70556 samples within 2 assays 
## Active assay: RNA (36601 features, 0 variable features)
##  1 other assay present: peaks

change library_name column name

colnames(tonsil_filtered@meta.data)[16]<-"library_name"

1.4 scATAC

1.4.1 Normalization and linear dimensional reduction

We exclude the first dimension as this is typically correlated with sequencing depth Cells cluster completely separately in ATAC without harmony; so run harmony after SVD

DefaultAssay(tonsil_filtered) <- "peaks"
tonsil_filtered <- RunTFIDF(tonsil_filtered)
## Performing TF-IDF normalization
tonsil_filtered <- FindTopFeatures(tonsil_filtered, min.cutoff = "q0")
tonsil_filtered <- RunSVD(tonsil_filtered)
## Running SVD
## Scaling cell embeddings

1.4.2 Plot the Depth correlation plot

Compute the correlation between total counts and each reduced dimension component.

LSI component is typically highly correlated with sequencing depth. The first LSI component often captures sequencing depth (technical variation) rather than biological variation. If this is the case, the component should be removed from downstream analysis. We can assess the correlation between each LSI component and sequencing depth using the DepthCor() function:

For scRNA-seq data we don’t typically observe such a strong relationship between the first PC and sequencing depth, and so usually retain the first PC in downstream analyses.

DepthCor(tonsil_filtered)

Here we see there is a very strong correlation between the first LSI component and the total number of counts for the cell, so we will perform downstream steps without this component.

1.4.3 UMAP representation

  • dimensional reduction key, specifies the string before the number for the dimension names. UMAP by default
  • reduction.name: Name to store dimensional reduction under in the Seurat object
tonsil_filtered <- RunUMAP(
  tonsil_filtered,
  dims = 2:40,
  reduction = "lsi",
  reduction.name = "umap.atac",
  reduction.key = "atacUMAP_"
)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 16:48:48 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:48:48 Read 70556 rows and found 39 numeric columns
## 16:48:48 Using Annoy for neighbor search, n_neighbors = 30
## 16:48:48 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:48:58 Writing NN index file to temp file /var/folders/kz/y10np0cj213fybqz181z9lghbzr42p/T//RtmpXJ0J8c/file3bd11a83e32b
## 16:48:58 Searching Annoy index using 1 thread, search_k = 3000
## 16:49:32 Annoy recall = 100%
## 16:49:33 Commencing smooth kNN distance calibration using 1 thread
## 16:49:37 Initializing from normalized Laplacian + noise
## 16:49:40 Commencing optimization for 200 epochs, with 3114822 positive edges
## 16:50:26 Optimization finished
atac.umap<-DimPlot(
  tonsil_filtered,
  reduction = "umap.atac",
  group.by = "library_name",
  pt.size = 0.1
) + ggtitle('scATAC UMAP') + NoLegend()

atac.umap

#split_by: library ,edad, genero

1.5 scRNA

1.5.1 Normalization and linear dimensional reduction-

1.5.2 NormalizeData

DefaultAssay(tonsil_filtered) <- "RNA"
tonsil_filtered <- NormalizeData(
  tonsil_filtered,
  normalization.method = "LogNormalize",
  scale.factor = 1e4
)

tonsil_filtered <- tonsil_filtered %>%
  FindVariableFeatures(nfeatures = 3000) %>%
  ScaleData() %>% 
  RunPCA() 
## Centering and scaling data matrix
## PC_ 1 
## Positive:  FYB1, BCL2, INPP4B, TC2N, BCL11B, THEMIS, ANK3, IL7R, CD96, PRKCH 
##     ITK, CD247, AAK1, IL6ST, TNIK, IQGAP2, SERINC5, NR3C2, AC105402.3, CD3D 
##     LINC00861, IPCEF1, RBMS1, LEF1, MLLT3, SLC9A9, FKBP5, PLCL1, TSHZ2, IL32 
## Negative:  MKI67, MYBL1, TOP2A, AC023590.1, STMN1, NUSAP1, AFF2, RGS13, RAPGEF5, DIAPH3 
##     ASPM, GALNT14, NUGGC, NCAPG, KNL1, HMGB2, BUB1B, TPX2, KIF11, POLQ 
##     RRM2, CDK1, ANLN, PDGFD, AURKB, SHCBP1, GTSE1, MELK, TUBA1B, PCLAF 
## PC_ 2 
## Positive:  TCF4, FCRL5, COL19A1, CD83, LRRK2, ZDHHC14, KYNU, KHDRBS2, ENTPD1, CCSER1 
##     IGHM, CR1, CR2, ZEB2, LARGE1, RHEX, TBC1D9, IGHD, KLHL14, JADE3 
##     HLA-DRB5, MTSS1, STEAP1B, MYO1E, FCRL3, IGKC, MCTP2, VAV3-AS1, AL355076.2, EAF2 
## Negative:  FYB1, INPP4B, PRKCH, BCL11B, TC2N, CD247, ITK, THEMIS, CD3D, CAMK4 
##     AAK1, TNIK, PHACTR2, PRKCA, IL7R, SERINC5, ST8SIA1, IL6ST, IL32, IQGAP2 
##     IPCEF1, GPRIN3, LINC00861, MLLT3, SRGN, LEF1, ICOS, PLCL1, TXK, MAF 
## PC_ 3 
## Positive:  AC104170.1, MAML3, LHFPL2, FGD6, AC023590.1, RAPGEF5, CCDC88A, CCDC144A, SPRED2, RGS13 
##     STXBP6, MGLL, AL390957.1, MAPK10, PDGFD, EEPD1, SAMD12, AC012368.1, LINC00877, LOXL2 
##     LINC02099, AFF2, MED12L, NIBAN1, SERPINA9, FAM106A, EXT1, SPAG16, P2RY12, SH3RF1 
## Negative:  TOP2A, COL19A1, ASPM, UBE2C, DLGAP5, KIF14, GTSE1, HMMR, CENPE, AURKB 
##     CENPF, MKI67, TPX2, CDC20, CDCA8, PLK1, CDK1, CDCA3, KIF23, AURKA 
##     BIRC5, CR1, DEPDC1, HJURP, NDC80, CKAP2L, IGHM, KIFC1, ANLN, BUB1 
## PC_ 4 
## Positive:  AC104170.1, RAPGEF5, FGD6, AFF2, AC023590.1, LOXL2, SERPINA9, PDGFD, RGS13, CD38 
##     SPRED2, AC012368.1, BACH2, ZBTB20-AS5, MME, LHFPL2, SYBU, MCTP2, MAPK10, ZNF608 
##     SPAG16, MYBL1, MAML3, GALNT14, STXBP6, AC025569.1, LINC00877, LPP, AC019211.1, SUGCT 
## Negative:  PLXDC2, LRMDA, DOCK5, LYZ, CSF2RA, DAPK1, SLC8A1, CST3, WDFY3, RTN1 
##     TYROBP, RBM47, CPVL, MS4A6A, FLT3, LGALS2, FGL2, DST, SHTN1, HCK 
##     NEGR1, FMN1, SRGAP1, FCER1G, RGS18, SLC8A1-AS1, ZNF366, ITGAX, TNFAIP2, NHSL1 
## PC_ 5 
## Positive:  BACH2, SLC8A1, LPP, PLXDC2, LRMDA, LYZ, DOCK5, MAML3, CSF2RA, RTN1 
##     CPVL, SSBP2, PID1, WDFY3, CST3, ST18, LGALS2, VAV3-AS1, FMN1, CR2 
##     TYROBP, MS4A6A, MOB1B, SRGAP1, RNF144B, MYO1E, CD83, FGD6, EPB41L3, TLR2 
## Negative:  DERL3, MZB1, XBP1, CFAP54, FKBP11, JCHAIN, SSR4, ACOXL, FNDC3B, TXNDC5 
##     LINC02362, PRDM1, HSP90B1, AL591518.1, CREB3L2, SEC11C, IGHG1, IGHGP, TMC3-AS1, TENT5C 
##     IGHG3, PRDX4, LINC01484, SLAMF7, SELENOS, PDIA4, MANF, SEC24D, HSPA5, IGHG4
PCAPlot(tonsil_filtered,
  group.by = "library_name")

ElbowPlot(object = tonsil_filtered)

1.5.3 UMAP representation

tonsil_filtered <- RunUMAP(
  tonsil_filtered,
  dims = 1:30,
  reduction = "pca",
  reduction.name = "umap.rna",
  reduction.key = "rnaUMAP_"
)
## 16:52:57 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:52:57 Read 70556 rows and found 30 numeric columns
## 16:52:57 Using Annoy for neighbor search, n_neighbors = 30
## 16:52:57 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:53:05 Writing NN index file to temp file /var/folders/kz/y10np0cj213fybqz181z9lghbzr42p/T//RtmpXJ0J8c/file3bd1134c5cca
## 16:53:05 Searching Annoy index using 1 thread, search_k = 3000
## 16:53:30 Annoy recall = 100%
## 16:53:32 Commencing smooth kNN distance calibration using 1 thread
## 16:53:36 Initializing from normalized Laplacian + noise
## 16:53:41 Commencing optimization for 200 epochs, with 3138068 positive edges
## 16:54:26 Optimization finished
rna.umap<-DimPlot(
  tonsil_filtered,
  reduction = "umap.rna",
  group.by = "library_name",
  pt.size = 0.1) + NoLegend() + ggtitle('scRNA UMAP')

rna.umap

atac.umap + rna.umap

hacer primero harmony, quitar batch effect. atac y rna. harmony

2 Run Harmony

Pass the Seurat object to the RunHarmony function and specify which variable to integrate out. A Seurat object is returned with corrected Harmony coordinates.

DefaultAssay(tonsil_filtered) <- "peaks"
tonsil_filtered <- RunHarmony(
  object = tonsil_filtered,
  reduction = "lsi",
  dims = 2:40,
  group.by.vars = "library_name",
  assay.use = "peaks",
  project.dim = FALSE,
  reduction.save = "harmony_peaks"
)
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony 5/10
## Harmony 6/10
## Harmony 7/10
## Harmony 8/10
## Harmony converged after 8 iterations

2.0.1 UMAP representation

tonsil_filtered <- RunUMAP(
  tonsil_filtered,
  dims = 2:40,
  reduction = "harmony_peaks",
  reduction.name = "umap.atac",
  reduction.key = "atacUMAP_"
)
## 17:00:27 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:00:27 Read 70556 rows and found 39 numeric columns
## 17:00:27 Using Annoy for neighbor search, n_neighbors = 30
## 17:00:27 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:00:36 Writing NN index file to temp file /var/folders/kz/y10np0cj213fybqz181z9lghbzr42p/T//RtmpXJ0J8c/file3bd1268c9700
## 17:00:36 Searching Annoy index using 1 thread, search_k = 3000
## 17:01:08 Annoy recall = 100%
## 17:01:09 Commencing smooth kNN distance calibration using 1 thread
## 17:01:13 Initializing from normalized Laplacian + noise
## 17:01:17 Commencing optimization for 200 epochs, with 3220948 positive edges
## 17:02:02 Optimization finished
Harm_peak<-DimPlot(
  tonsil_filtered,
  reduction = "umap.atac",
  group.by = "library_name",
  pt.size = 0.1
) + NoLegend() + ggtitle('Peak Harmony')

3 scRNA

DefaultAssay(tonsil_filtered) <- "RNA"
tonsil_filtered <- RunHarmony(
  object = tonsil_filtered,
  reduction = "pca",
  dims = 1:30,
  group.by.vars = "library_name",
  assay.use = "RNA",
  project.dim = FALSE,
  reduction.save = "harmony_rna"
)
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony 5/10
## Harmony 6/10
## Harmony 7/10
## Harmony 8/10
## Harmony 9/10
## Harmony converged after 9 iterations
tonsil_filtered <- RunUMAP(
  tonsil_filtered,
  dims = 2:40,
  reduction = "harmony_rna",
  reduction.name = "umap.rna",
  reduction.key = "rnaUMAP_"
)
## 17:06:57 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:06:57 Read 70556 rows and found 39 numeric columns
## 17:06:57 Using Annoy for neighbor search, n_neighbors = 30
## 17:06:57 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:07:05 Writing NN index file to temp file /var/folders/kz/y10np0cj213fybqz181z9lghbzr42p/T//RtmpXJ0J8c/file3bd130c0c14e
## 17:07:05 Searching Annoy index using 1 thread, search_k = 3000
## 17:07:33 Annoy recall = 100%
## 17:07:34 Commencing smooth kNN distance calibration using 1 thread
## 17:07:38 Initializing from normalized Laplacian + noise
## 17:07:44 Commencing optimization for 200 epochs, with 3239852 positive edges
## 17:08:29 Optimization finished
Harm_rna<-DimPlot(
  tonsil_filtered,
  reduction = "umap.rna",
  group.by = "library_name",
  pt.size = 0.1
) + NoLegend() + ggtitle('RNA Harmony')

3.1 scATAC and RNAseq Harmony

Harm_peak+Harm_rna + plot_annotation(title = 'Harmony peak and RNA UMAP visualization')

3.2 Joint UMAP visualization

# build a joint neighbor graph using both assays
tonsil_filtered <- FindMultiModalNeighbors(
  object = tonsil_filtered,
  reduction.list = list("pca", "lsi"), 
  dims.list = list(1:30, 2:40),
  modality.weight.name = "RNA.weight",
  verbose = TRUE
)
## Calculating cell-specific modality weights
## Finding 20 nearest neighbors for each modality.
## Calculating kernel bandwidths
## Warning in FindMultiModalNeighbors(object = tonsil_filtered, reduction.list =
## list("pca", : The number of provided modality.weight.name is not equal to the
## number of modalities. RNA.weight peaks.weight are used to store the modality
## weights
## Finding multimodal neighbors
## Constructing multimodal KNN graph
## Constructing multimodal SNN graph
# build a joint UMAP visualization
tonsil_filtered <- RunUMAP(
  object = tonsil_filtered,
  nn.name = "weighted.nn",
  assay = "RNA",
  verbose = TRUE
)
## 17:12:45 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:12:47 Commencing smooth kNN distance calibration using 1 thread
## 17:12:50 Initializing from normalized Laplacian + noise
## 17:12:55 Commencing optimization for 200 epochs, with 2234632 positive edges
## 17:13:42 Optimization finished
joint.umap<- DimPlot(tonsil_filtered, label = FALSE, group.by = "library_name",repel = TRUE, reduction = "umap") + plot_annotation(title = 'Joint UMAP ')+ ggtitle('Joint UMAP ')
(atac.umap + rna.umap )

joint.umap

4 Save

We will save the resulting object and use it in the following notebook to exclude doublets:

saveRDS(tonsil_filtered, path_to_save)

5 Session Information

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] es_ES.UTF-8/es_ES.UTF-8/es_ES.UTF-8/C/es_ES.UTF-8/es_ES.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] patchwork_1.1.1           ggplot2_3.3.5            
##  [3] dplyr_1.0.7               stringr_1.4.0            
##  [5] EnsDb.Hsapiens.v86_2.99.0 ensembldb_2.18.2         
##  [7] AnnotationFilter_1.18.0   GenomicFeatures_1.46.1   
##  [9] AnnotationDbi_1.56.2      Biobase_2.54.0           
## [11] harmony_0.1.0             Rcpp_1.0.7               
## [13] future_1.23.0             GenomicRanges_1.46.1     
## [15] GenomeInfoDb_1.30.0       IRanges_2.28.0           
## [17] S4Vectors_0.32.3          BiocGenerics_0.40.0      
## [19] SeuratObject_4.0.4        Seurat_4.0.6             
## [21] Signac_1.5.0              RColorBrewer_1.1-2       
## [23] BiocStyle_2.22.0         
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.2                  reticulate_1.22            
##   [3] tidyselect_1.1.1            RSQLite_2.2.9              
##   [5] htmlwidgets_1.5.4           grid_4.1.2                 
##   [7] docopt_0.7.1                BiocParallel_1.28.3        
##   [9] Rtsne_0.15                  munsell_0.5.0              
##  [11] codetools_0.2-18            ica_1.0-2                  
##  [13] miniUI_0.1.1.1              withr_2.4.3                
##  [15] colorspace_2.0-2            filelock_1.0.2             
##  [17] highr_0.9                   knitr_1.36                 
##  [19] ROCR_1.0-11                 tensor_1.5                 
##  [21] listenv_0.8.0               labeling_0.4.2             
##  [23] MatrixGenerics_1.6.0        slam_0.1-49                
##  [25] GenomeInfoDbData_1.2.7      polyclip_1.10-0            
##  [27] bit64_4.0.5                 farver_2.1.0               
##  [29] parallelly_1.30.0           vctrs_0.3.8                
##  [31] generics_0.1.1              xfun_0.29                  
##  [33] BiocFileCache_2.2.0         lsa_0.73.2                 
##  [35] ggseqlogo_0.1               R6_2.5.1                   
##  [37] bitops_1.0-7                spatstat.utils_2.3-0       
##  [39] cachem_1.0.6                DelayedArray_0.20.0        
##  [41] assertthat_0.2.1            promises_1.2.0.1           
##  [43] BiocIO_1.4.0                scales_1.1.1               
##  [45] gtable_0.3.0                globals_0.14.0             
##  [47] goftest_1.2-3               rlang_0.4.12               
##  [49] RcppRoll_0.3.0              splines_4.1.2              
##  [51] rtracklayer_1.54.0          lazyeval_0.2.2             
##  [53] spatstat.geom_2.3-1         BiocManager_1.30.16        
##  [55] yaml_2.2.1                  reshape2_1.4.4             
##  [57] abind_1.4-5                 httpuv_1.6.4               
##  [59] tools_4.1.2                 bookdown_0.24              
##  [61] ellipsis_0.3.2              spatstat.core_2.3-2        
##  [63] jquerylib_0.1.4             ggridges_0.5.3             
##  [65] plyr_1.8.6                  progress_1.2.2             
##  [67] zlibbioc_1.40.0             purrr_0.3.4                
##  [69] RCurl_1.98-1.5              prettyunits_1.1.1          
##  [71] rpart_4.1-15                deldir_1.0-6               
##  [73] pbapply_1.5-0               cowplot_1.1.1              
##  [75] zoo_1.8-9                   SummarizedExperiment_1.24.0
##  [77] ggrepel_0.9.1               cluster_2.1.2              
##  [79] magrittr_2.0.1              RSpectra_0.16-0            
##  [81] magick_2.7.3                data.table_1.14.2          
##  [83] scattermore_0.7             lmtest_0.9-39              
##  [85] RANN_2.6.1                  SnowballC_0.7.0            
##  [87] ProtGenerics_1.26.0         fitdistrplus_1.1-6         
##  [89] matrixStats_0.61.0          hms_1.1.1                  
##  [91] mime_0.12                   evaluate_0.14              
##  [93] xtable_1.8-4                XML_3.99-0.8               
##  [95] sparsesvd_0.2               gridExtra_2.3              
##  [97] compiler_4.1.2              biomaRt_2.50.1             
##  [99] tibble_3.1.6                KernSmooth_2.23-20         
## [101] crayon_1.4.2                htmltools_0.5.2            
## [103] mgcv_1.8-38                 later_1.3.0                
## [105] tidyr_1.1.4                 DBI_1.1.1                  
## [107] tweenr_1.0.2                dbplyr_2.1.1               
## [109] MASS_7.3-54                 rappdirs_0.3.3             
## [111] Matrix_1.3-4                parallel_4.1.2             
## [113] igraph_1.2.10               pkgconfig_2.0.3            
## [115] GenomicAlignments_1.30.0    plotly_4.10.0              
## [117] spatstat.sparse_2.1-0       xml2_1.3.3                 
## [119] bslib_0.3.1                 XVector_0.34.0             
## [121] digest_0.6.29               sctransform_0.3.2          
## [123] RcppAnnoy_0.0.19            spatstat.data_2.1-2        
## [125] Biostrings_2.62.0           rmarkdown_2.11             
## [127] leiden_0.3.9                fastmatch_1.1-3            
## [129] uwot_0.1.11                 restfulr_0.0.13            
## [131] curl_4.3.2                  shiny_1.7.1                
## [133] Rsamtools_2.10.0            rjson_0.2.20               
## [135] lifecycle_1.0.1             nlme_3.1-153               
## [137] jsonlite_1.7.2              viridisLite_0.4.0          
## [139] fansi_0.5.0                 pillar_1.6.4               
## [141] lattice_0.20-45             KEGGREST_1.34.0            
## [143] fastmap_1.1.0               httr_1.4.2                 
## [145] survival_3.2-13             glue_1.6.0                 
## [147] qlcMatrix_0.9.7             png_0.1-7                  
## [149] bit_4.0.4                   ggforce_0.3.3              
## [151] stringi_1.7.6               sass_0.4.0                 
## [153] blob_1.2.2                  memoise_2.0.1              
## [155] irlba_2.3.5                 future.apply_1.8.1